knitr::opts_chunk$set(fig.align="center")
library(rstanarm)
library(tidyverse)
library(tidybayes)
library(modelr)
library(ggplot2)
library(magrittr)
library(emmeans)
library(bayesplot)
library(brms)
library(gganimate)
theme_set(theme_light())
source('helper_functions.R')
In our experiment, we used a visualization recommendation algorithm (composed of one search algorithm and one oracle algorithm) to generate visualizations for the user on one of two datasets. We then measured the user’s time to complete each of four tasks: 1. Find Extremum 2. Retrieve Value 3. Prediction 4. Exploration
Given a search algorithm (bfs or dfs), an oracle (compassql or dziban), and a dataset (birdstrikes or movies), we would like to predict the time it takes the average user to complete each task. In addition, we would like to know if the choice of search algorithm and oracle has any meaningful impact on a user’s completion time for each of these four tasks, and if the participant’s group (student or professional) is associated with a difference in performance.
time_data = read.csv('split_by_participant_groups/completion_time.csv')
time_data <- time_data %>%
mutate(
dataset = as.factor(dataset),
oracle = as.factor(oracle),
search = as.factor(search),
task = as.factor(task)
)
time_data$condition <- paste(time_data$oracle, time_data$search)
task_list = c("1. Find Extremum",
"2. Retrieve Value",
"3. Prediction",
"4. Exploration")
seed = 12
The weakly informative prior (normal(360.48, 224.40)) was derived from pilot studies and describes the distribution of time (in seconds) needed to complete any given task under any condition. Because our pilot study was small, we chose to aggregate these measurements (rather than deriving separate priors for each task) to minimize the effect of biases.
The lognormal family was selected to prevent our model from predicting completion times of less than zero seconds.
model <- brm(
formula = bf(
completion_time ~ oracle * search + dataset + task + participant_group + (1 | participant_id)
),
prior = prior(normal(360.48, 224.40), class = "Intercept"),
chains = 2,
cores = 2,
iter = 2500,
warmup = 1000,
data = time_data,
family = lognormal(),
file="models/time",
seed = seed
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/usr/local/include -fPIC -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Start sampling
In the summary table, we want to see Rhat values close to 1.0 and Bulk_ESS in the thousands.
summary(model)
## Family: lognormal
## Links: mu = identity; sigma = identity
## Formula: completion_time ~ oracle * search + dataset + task + participant_group + (1 | participant_id)
## Data: time_data (Number of observations: 288)
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~participant_id (Number of levels: 72)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.19 0.04 0.11 0.27 1.00 850 783
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 5.29 0.09 5.12 5.48 1.00 2514
## oracledziban 0.03 0.09 -0.16 0.20 1.00 2078
## searchdfs 0.10 0.09 -0.08 0.27 1.00 1887
## datasetmovies -0.08 0.07 -0.21 0.05 1.00 2915
## task2.RetrieveValue 0.01 0.07 -0.12 0.14 1.00 3854
## task3.Prediction 1.12 0.07 0.98 1.25 1.00 4287
## task4.Exploration 1.22 0.07 1.09 1.35 1.00 3413
## participant_groupstudent 0.06 0.07 -0.07 0.19 1.00 2846
## oracledziban:searchdfs -0.11 0.13 -0.37 0.14 1.00 1956
## Tail_ESS
## Intercept 2488
## oracledziban 2094
## searchdfs 1917
## datasetmovies 2439
## task2.RetrieveValue 2361
## task3.Prediction 2510
## task4.Exploration 2458
## participant_groupstudent 2506
## oracledziban:searchdfs 2082
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.40 0.02 0.37 0.45 1.00 1902 2080
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Trace plots help us check whether there is evidence of non-convergence for model.
plot(model)
In our pairs plots, we want to make sure we don’t have highly correlated parameters (highly correlated parameters means that our model has difficulty differentiating the effect of such parameters).
pairs(
model,
pars = c("b_Intercept",
"b_datasetmovies",
"b_oracledziban",
"b_searchdfs",
"b_task2.RetrieveValue",
"b_task3.Prediction",
"b_task4.Exploration"),
fixed = TRUE
)
Using draws from the posterior, we can visualize parameter effects and average response. The thicker, shorter line represents the 95% credible interval, while the thinner, longer line represents the 50% credible interval.
(These won’t be outputted in this analysis document, but the files will be saved to the “../plot_data/posterior_draws/time/” directory)
draw_data <- time_data %>%
add_fitted_draws(model, seed = seed, re_formula = NA)
for (task_name in task_list) {
draw_data_sub <- subset(draw_data, task == task_name)
plot <- posterior_draws_plot(draw_data_sub, "dataset", FALSE, "Predicted Mean Completion Time (seconds)", "Oracle/Search Combination")
plot
filename = gsub("^.*\\.","", task_name )
filename = gsub(" ", "_", filename)
filename = paste("time", filename, sep = "")
ggsave(
file = paste(filename, ".png", sep = ""),
plot = plot,
path = "../plots/posterior_draws/time", width = 7, height = 7
)
fit_info <- draw_data_sub %>% group_by(search, oracle, dataset) %>% mean_qi(.value, .width = c(.95, .5))
fit_info
write.csv(fit_info,
paste("../plot_data/posterior_draws/time/", filename, ".csv", sep = ""),
row.names = FALSE)
}
Now let’s make a summary plot for the two tasks we care about (1. Find Extremum and 2. Retrieve Value) with predicted mean completion times for each oracle/search combination.
plot_data <- draw_data
plot_data <- plot_data[plot_data$task %in% c("1. Find Extremum", "2. Retrieve Value"),]
plot_data$task <- factor(plot_data$task)
plot_data$oracle<- gsub('compassql', 'CompassQL', plot_data$oracle)
plot_data$oracle<- gsub('dziban', 'Dziban', plot_data$oracle)
plot_data$search<- gsub('bfs', 'BFS', plot_data$search)
plot_data$search<- gsub('dfs', 'DFS', plot_data$search)
plot_data$dataset<- gsub('birdstrikes', 'Birdstrikes', plot_data$dataset)
plot_data$dataset<- gsub('movies', 'Movies', plot_data$dataset)
plot_data$Dataset<- plot_data$dataset
plot_data$condition <- paste(plot_data$oracle, plot_data$search, sep='\n')
draw_plot <- posterior_draws_plot(plot_data, "Dataset", TRUE, "Predicted Mean Completion Time (seconds)", "Oracle/Search Combination") + theme(axis.text.y=element_text(size=12) )+ scale_alpha(guide = 'none') + xlab("Predicted Mean Completion Time (seconds)")
draw_plot
## # A tibble: 32 x 10
## # Groups: search, oracle, dataset [8]
## search oracle dataset task .value .lower .upper .width .point .interval
## <chr> <chr> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 BFS Compas… Birdstr… 1. Find… 224. 186. 269. 0.95 mean qi
## 2 BFS Compas… Birdstr… 2. Retr… 226. 187. 272. 0.95 mean qi
## 3 BFS Compas… Movies 1. Find… 206. 170. 245. 0.95 mean qi
## 4 BFS Compas… Movies 2. Retr… 207. 171. 248. 0.95 mean qi
## 5 BFS Dziban Birdstr… 1. Find… 230. 189. 276. 0.95 mean qi
## 6 BFS Dziban Birdstr… 2. Retr… 231. 190. 279. 0.95 mean qi
## 7 BFS Dziban Movies 1. Find… 211. 173. 253. 0.95 mean qi
## 8 BFS Dziban Movies 2. Retr… 213. 174. 258. 0.95 mean qi
## 9 DFS Compas… Birdstr… 1. Find… 248. 204. 300. 0.95 mean qi
## 10 DFS Compas… Birdstr… 2. Retr… 250. 206. 302. 0.95 mean qi
## # … with 22 more rows
Next, we want to see if there is any significant difference in completion time between the two search algorithms (bfs and dfs), the two oracles (dzbian and compassql), and the two participant groups (student and professional)
predictive_data <- time_data %>%
add_fitted_draws(model, seed = seed, re_formula = NA)
search_differences <- expected_diff_in_mean_plot(predictive_data, "search", "Difference in Mean Completion Time (Seconds)", "Task", NULL)
## `summarise()` regrouping output by 'search', 'task' (override with `.groups` argument)
ggsave(file="search_time_differences.png", plot=search_differences$plot, path = "../plots/comparisons/time", width = 7, height = 7)
search_differences$plot
We can double-check the boundaries of the credible intervals to be sure whether or not the interval contains zero.
write.csv(search_differences$intervals, "../plot_data/comparisons/time/search_time_differences.csv", sep="", row.names = FALSE)
## Warning in write.csv(search_differences$intervals, "../plot_data/comparisons/
## time/search_time_differences.csv", : attempt to set 'sep' ignored
search_differences$intervals
## # A tibble: 8 x 8
## # Groups: search [1]
## search task difference .lower .upper .width .point .interval
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 bfs - dfs 1. Find Extremum -11.3 -39.8 18.7 0.95 mean qi
## 2 bfs - dfs 2. Retrieve Value -11.3 -40.2 19.2 0.95 mean qi
## 3 bfs - dfs 3. Prediction -34.4 -121. 57.6 0.95 mean qi
## 4 bfs - dfs 4. Exploration -38.2 -136. 62.7 0.95 mean qi
## 5 bfs - dfs 1. Find Extremum -11.3 -21.2 -1.67 0.5 mean qi
## 6 bfs - dfs 2. Retrieve Value -11.3 -21.3 -1.69 0.5 mean qi
## 7 bfs - dfs 3. Prediction -34.4 -64.8 -5.16 0.5 mean qi
## 8 bfs - dfs 4. Exploration -38.2 -71.8 -5.61 0.5 mean qi
Let’s do the above, but split it by datasets.
search_differences_split_by_dataset <- expected_diff_in_mean_plot(predictive_data, "search", "Difference in Mean Completion Time (Seconds)", "Task", "dataset")
## `summarise()` regrouping output by 'search', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_search_time_differences.png", plot=search_differences_split_by_dataset$plot, path = "../plots/comparisons/time", width = 7, height = 7)
search_differences_split_by_dataset$plot
Check intervals for search differences split by dataset.
write.csv(search_differences_split_by_dataset$intervals, "../plot_data/comparisons/time/search_time_differences_split_by_dataset.csv", row.names = FALSE)
search_differences_split_by_dataset$intervals
## # A tibble: 16 x 9
## # Groups: search, dataset [2]
## search dataset task difference .lower .upper .width .point .interval
## <chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 bfs - d… birdstr… 1. Find E… -11.8 -42.3 19.4 0.95 mean qi
## 2 bfs - d… birdstr… 2. Retrie… -11.8 -42.2 19.9 0.95 mean qi
## 3 bfs - d… birdstr… 3. Predic… -35.9 -128. 60.4 0.95 mean qi
## 4 bfs - d… birdstr… 4. Explor… -39.8 -142. 64.9 0.95 mean qi
## 5 bfs - d… movies 1. Find E… -10.8 -38.0 17.8 0.95 mean qi
## 6 bfs - d… movies 2. Retrie… -10.8 -38.5 18.3 0.95 mean qi
## 7 bfs - d… movies 3. Predic… -32.9 -116. 54.7 0.95 mean qi
## 8 bfs - d… movies 4. Explor… -36.5 -132. 58.9 0.95 mean qi
## 9 bfs - d… birdstr… 1. Find E… -11.8 -22.1 -1.71 0.5 mean qi
## 10 bfs - d… birdstr… 2. Retrie… -11.8 -22.3 -1.73 0.5 mean qi
## 11 bfs - d… birdstr… 3. Predic… -35.9 -67.7 -5.26 0.5 mean qi
## 12 bfs - d… birdstr… 4. Explor… -39.8 -74.6 -5.77 0.5 mean qi
## 13 bfs - d… movies 1. Find E… -10.8 -20.0 -1.61 0.5 mean qi
## 14 bfs - d… movies 2. Retrie… -10.8 -20.3 -1.60 0.5 mean qi
## 15 bfs - d… movies 3. Predic… -32.9 -61.7 -4.93 0.5 mean qi
## 16 bfs - d… movies 4. Explor… -36.5 -68.3 -5.32 0.5 mean qi
oracle_differences <- expected_diff_in_mean_plot(predictive_data, "oracle", "Difference in Mean Completion Time (Seconds)", "Task", NULL)
## `summarise()` regrouping output by 'oracle', 'task' (override with `.groups` argument)
ggsave(file="oracle_time_differences.png", plot=oracle_differences$plot, path = "../plots/comparisons/time", width = 7, height = 7)
oracle_differences$plot
We can double-check the boundaries of the credible intervals to be sure whether or not the interval contains zero.
write.csv(oracle_differences$intervals, "../plot_data/comparisons/time/oracle_time_differences.csv", sep="", row.names = FALSE)
## Warning in write.csv(oracle_differences$intervals, "../plot_data/comparisons/
## time/oracle_time_differences.csv", : attempt to set 'sep' ignored
oracle_differences$intervals
## # A tibble: 8 x 8
## # Groups: oracle [1]
## oracle task difference .lower .upper .width .point .interval
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 dziban - compa… 1. Find Extr… -6.70 -35.8 20.6 0.95 mean qi
## 2 dziban - compa… 2. Retrieve … -6.73 -36.7 21.3 0.95 mean qi
## 3 dziban - compa… 3. Prediction -20.4 -110. 63.7 0.95 mean qi
## 4 dziban - compa… 4. Explorati… -22.6 -121. 71.5 0.95 mean qi
## 5 dziban - compa… 1. Find Extr… -6.70 -16.3 2.96 0.5 mean qi
## 6 dziban - compa… 2. Retrieve … -6.73 -16.4 2.96 0.5 mean qi
## 7 dziban - compa… 3. Prediction -20.4 -49.4 9.14 0.5 mean qi
## 8 dziban - compa… 4. Explorati… -22.6 -55.3 9.97 0.5 mean qi
Let’s do the above, but split it by datasets.
oracle_differences_subset_split_by_dataset <- expected_diff_in_mean_plot(predictive_data, "oracle", "Difference in Mean Completion Time (Seconds)", "Task", "dataset")
## `summarise()` regrouping output by 'oracle', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_oracle_time_differences.png", plot=oracle_differences_subset_split_by_dataset$plot, path = "../plots/comparisons/time", width = 7, height = 7)
oracle_differences_subset_split_by_dataset$plot
Check intervals for search differences split by dataset.
write.csv(oracle_differences_subset_split_by_dataset$intervals, "../plot_data/comparisons/time/oracle_time_differences_split_by_dataset.csv", row.names = FALSE)
oracle_differences_subset_split_by_dataset$intervals
## # A tibble: 16 x 9
## # Groups: oracle, dataset [2]
## oracle dataset task difference .lower .upper .width .point .interval
## <chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 dziban - … birdstr… 1. Find… -6.99 -37.3 21.6 0.95 mean qi
## 2 dziban - … birdstr… 2. Retr… -7.02 -38.3 22.3 0.95 mean qi
## 3 dziban - … birdstr… 3. Pred… -21.3 -115. 66.3 0.95 mean qi
## 4 dziban - … birdstr… 4. Expl… -23.6 -127. 74.7 0.95 mean qi
## 5 dziban - … movies 1. Find… -6.41 -34.4 20.1 0.95 mean qi
## 6 dziban - … movies 2. Retr… -6.44 -34.8 20.3 0.95 mean qi
## 7 dziban - … movies 3. Pred… -19.5 -106. 61.9 0.95 mean qi
## 8 dziban - … movies 4. Expl… -21.7 -114. 69.4 0.95 mean qi
## 9 dziban - … birdstr… 1. Find… -6.99 -17.0 3.05 0.5 mean qi
## 10 dziban - … birdstr… 2. Retr… -7.02 -17.2 3.13 0.5 mean qi
## 11 dziban - … birdstr… 3. Pred… -21.3 -51.6 9.60 0.5 mean qi
## 12 dziban - … birdstr… 4. Expl… -23.6 -57.6 10.6 0.5 mean qi
## 13 dziban - … movies 1. Find… -6.41 -15.6 2.87 0.5 mean qi
## 14 dziban - … movies 2. Retr… -6.44 -15.7 2.82 0.5 mean qi
## 15 dziban - … movies 3. Pred… -19.5 -47.3 8.89 0.5 mean qi
## 16 dziban - … movies 4. Expl… -21.7 -53.1 9.57 0.5 mean qi
participant_group_differences <- expected_diff_in_mean_plot(predictive_data, "participant_group", "Difference in Mean Completion Time (Seconds)", "Task", NULL)
## `summarise()` regrouping output by 'participant_group', 'task' (override with `.groups` argument)
ggsave(file="participant_group_time_differences.png", plot=participant_group_differences$plot, path = "../plots/comparisons/time", width = 7, height = 7)
participant_group_differences$plot
Let’s do the above, but split it by datasets.
participant_group_differences_split_by_dataset <- expected_diff_in_mean_plot(predictive_data, "participant_group", "Difference in Mean Completion Time (Seconds)", "Task", "dataset")
## `summarise()` regrouping output by 'participant_group', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_participant_group_time_differences.png", plot=participant_group_differences_split_by_dataset$plot, path = "../plots/comparisons/time", width = 7, height = 7)
participant_group_differences_split_by_dataset$plot
We can double-check the boundaries of the credible intervals to be sure whether or not the interval contains zero.
write.csv(participant_group_differences$intervals, "../plot_data/comparisons/time/participant_group_time_differences.csv", sep="", row.names = FALSE)
## Warning in write.csv(participant_group_differences$intervals, "../plot_data/
## comparisons/time/participant_group_time_differences.csv", : attempt to set 'sep'
## ignored
participant_group_differences$intervals
## # A tibble: 8 x 8
## # Groups: participant_group [1]
## participant_group task difference .lower .upper .width .point .interval
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 student - professi… 1. Find … 13.5 -15.2 41.9 0.95 mean qi
## 2 student - professi… 2. Retri… 13.7 -15.6 42.8 0.95 mean qi
## 3 student - professi… 3. Predi… 41.5 -47.6 130. 0.95 mean qi
## 4 student - professi… 4. Explo… 45.9 -51.6 144. 0.95 mean qi
## 5 student - professi… 1. Find … 13.5 3.60 23.4 0.5 mean qi
## 6 student - professi… 2. Retri… 13.7 3.64 23.6 0.5 mean qi
## 7 student - professi… 3. Predi… 41.5 11.1 71.5 0.5 mean qi
## 8 student - professi… 4. Explo… 45.9 12.2 79.1 0.5 mean qi
Plot all of the posterior draws on one plot.
plot <- draw_data %>% ggplot(aes(
x = .value,
y = task,
fill = search,
alpha = 0.5
)) + stat_halfeye(.width = c(.95, .5)) +
labs(x = "Average Completion Time (Seconds)", y = "Task") + facet_grid(. ~ dataset)
plot
## Saving 7 x 5 in image
#Code for additional plots (mostly subsets)
predictive_data_subset <- predictive_data[predictive_data$task %in% c("1. Find Extremum", "2. Retrieve Value"),]
predictive_data_subset$task <- factor(predictive_data_subset$task)
search_differences_subset <- expected_diff_in_mean_plot(predictive_data_subset, "search", "Difference in Mean Completion Time (Seconds)", "Task", NULL)
## `summarise()` regrouping output by 'search', 'task' (override with `.groups` argument)
ggsave(file="search_time_differences_subset.png", plot=search_differences_subset$plot, path = "../plots/comparisons/time", width = 7, height = 7)
search_differences_subset$plot
diff_in_search_prediction_split_by_dataset_subset <- expected_diff_in_mean_plot(predictive_data_subset, "search", "Difference in Mean Completion Time (Seconds)", "Task", "dataset")
## `summarise()` regrouping output by 'search', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_search_time_differences_subset.png", plot=diff_in_search_prediction_split_by_dataset_subset$plot, path = "../plots/comparisons/time", width = 7, height = 7)
diff_in_search_prediction_split_by_dataset_subset$plot
oracle_differences_subset <- expected_diff_in_mean_plot(predictive_data_subset, "oracle", "Difference in Mean Completion Time (Seconds)", "Task", NULL)
## `summarise()` regrouping output by 'oracle', 'task' (override with `.groups` argument)
ggsave(file="oracle_time_differences_subset.png", plot=oracle_differences_subset$plot, path = "../plots/comparisons/time", width = 7, height = 7)
oracle_differences_subset$plot
oracle_differences_subset_split_by_dataset_subset <- expected_diff_in_mean_plot(predictive_data_subset, "oracle", "Difference in Mean Completion Time (Seconds)", "Task", "dataset")
## `summarise()` regrouping output by 'oracle', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_oracle_time_differences_subset.png", plot=oracle_differences_subset_split_by_dataset_subset$plot, path = "../plots/comparisons/time", width = 7, height = 7)
oracle_differences_subset_split_by_dataset_subset$plot